import numpy as np
import matplotlib.pyplot as plt
import mkl
from IPython.display import display, HTML
from matplotlib.animation import FuncAnimation
np.random.seed(1234)
mkl.set_num_threads(2)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams["figure.figsize"] = [16, 9]
# najechać kursorem na funkcję numpy i shift+tab
Implement sample_points function. This function should sample count points from a $d$-dimensional cube $[-1, 1]^d$ and return tuple with two NumPy arrays:
Point coordinates should be stored row-wise.
def sample_points(count, d):
points = np.random.uniform(low=-1, high=1, size=(count, d))
# 2d -> sqrt(x^2+y^2)
radii = np.sqrt(np.sum(np.square(points), axis=1))
# promień odcięcia -> 1
inner = points[radii <= 1, :]
outer = points[radii > 1, :]
return inner, outer
Implement approx_pi_mc function, which uses a draw from sample_points to estimate the vaue of $\pi$. Arguments are:
sample - a tuple returned by sample_pints with d=2. Note that you can estimate $\pi$ by approximating the surface area of a $2$-dimensional ball (disc) with a given radius. This is nothing else than approximating the integral of an indicator function over the surface of that ball. Plain Monte Carlo is good at approximating integrals when dimensionality is not overly large.
def approx_pi_mc(sample):
inner, outer = sample
# 2d
# inscribed_circle_area -> pi*r^2
# sq_area -> (2r)^2=4r^2
# inscribed_circle_area/sq_area = pi*r^2/4r^2
# inscribed_circle_area/sq_area = pi/4
# pi = 4*inscribed_circle_area/sq_area
pi_approx = 4*len(inner)/(len(inner)+len(outer))
return pi_approx
counts = [10 ** i for i in range(1, 7)]
figure, axis = plt.subplots(2, 3, figsize=(16, 9))
def update(idx):
row, col = idx % 2, idx //2
n = counts[idx]
sample = sample_points(n, 2)
inner, outer = sample
pi = approx_pi_mc(sample)
axis[row][col].set_title(f'size = {n}, $\pi$ = {pi:.3f}', fontsize = 10)
axis[row][col].scatter(inner[:, 0], inner[:, 1], c='r', s=2)
axis[row][col].scatter(outer[:, 0], outer[:, 1], c='b', s=2)
axis[row][col].set_xlim((-1, 1))
axis[row][col].set_ylim((-1, 1))
axis[row][col].set_xticks([])
axis[row][col].set_yticks([])
for i in range(len(counts)):
update(i)
plt.show()
Use sample_pints to estimate the ratio between volume of a $d$-dimensional ball and volume of a $d$-dimensional inscribing hypercube. Use ratio_sample_size points to estimate that ratio.
ratio_sample_size = 10000
def volume_ratio(d):
inner, outer = sample_points(ratio_sample_size, d)
return len(inner)/(len(inner) + len(outer))
Lets see how that ratio changes with the number of dimensions.
dims = np.arange(1, 50, 1)
ratios = np.vectorize(volume_ratio)(dims)
figure, axis = plt.subplots(1, 2, figsize=(15, 5))
axis[0].grid()
axis[0].plot(dims, ratios)
axis[1].grid()
axis[1].semilogy()
axis[1].plot(dims, ratios);
Rejection sampling won't work above approx. 10 dimensions - most samples will fall outside the ball and be rejected. But wait! How about we:
Implement sample_directions function, which returns points sampled randomly from the surface of a $d$-dimensional ball centered at origin and with unit radius. sample_directions should return an NumPy array, where each row is a sampled point (coordinates).
def sample_directions(count, d):
v = np.random.uniform(low=-1, high=1, size=(count,d))
return v / np.linalg.norm(v, axis=1, keepdims=True)
Lets generate some points.
sample_size = 200000
# 20 000 lub 200 000
# First we sample directions:
w = sample_directions(sample_size, 2)
# Then we sample radii:
r = np.random.uniform(low = 0.0, high = 1.0, size = sample_size)
# And now we can generate points:
ball_samples = w * r.reshape((-1, 1))
And plot the distribution.
plt.subplots(figsize=(10, 10))
plt.scatter(ball_samples[:, 0], ball_samples[:, 1], s=0.1)
plt.plot();
Is this a uniform distribution?
Sampling in many dimensions is not always easy - in fact it's often very hard. Latter in the course we will talk about Markov Chain Monte Carlo (MCMC) methods, which are better at sampling from complicated high-dimensional distributions.
Lets say we have an unfair coin, which falls heads 90% of times and tails 10% of times.
def unfair_coin():
return np.random.choice([0, 1], p=[0.1, 0.9])
Your task is to implement make_coin_fair function which returns unbiased and uncorrelated samples from $\{0, 1\}$, each with probability $0.5$.
Note: the only source of randomness you are allowed to use is unfair_coin function. Whatever you implement, you should be abl to justify that the returned samples are fair and uncorrelated.
def make_coin_fair():
coin1 = unfair_coin()
coin2 = unfair_coin()
while coin1 == coin2:
coin1 = unfair_coin()
coin2 = unfair_coin()
return coin1
test_size = 5000
samples = np.array([make_coin_fair() for i in range(test_size)])
heads = np.sum(samples == 1) / samples.shape[0]
tails = np.sum(samples == 0) / samples.shape[0]
acorr = np.correlate(samples, samples, mode='valid')[0] / test_size
figure, axis = plt.subplots(figsize=(5, 5))
axis.bar([0, 1], [tails, heads])
axis.set_xticks([0, 1])
axis.set_xticklabels(['tails', 'heads'])
display(HTML(f'<h4>(Convolutional) Auto correlation: {acorr}</h4>'))